###############################################################################
# Main Analysis I Appendix 2019 Only
###############################################################################
###############################################################################
# Models reported in the Appendix Text, including their Tables
###############################################################################
###############################################################################
# Content
###############################################################################
# 1) Dependencies
# 2) Load Data
# 3) Generate aggregated hits and documents per candidate for 2015 vs 2019
# 4) Models
# 5) Save Model Output in Tables
###############################################################################
# 1) Dependencies
###############################################################################
library(readr)
library(dplyr)
library(ggplot2)
library(gganimate)
library(ggeffects)
library(ggExtra)
library(ggridges)
library(ggrepel)
library(grid)
library(scales)
library(lubridate)
library(extrafont)
library(reshape2)
library(here)
library(ggforce)
library(png)
library(readxl)
library(grid)
library(gridExtra)
library(ggpubr)
library(sjPlot)
library(ggplot2)
library(lme4)
library(effects)
library(texreg)
library(MASS)
###############################################################################
# 2) Load Data
###############################################################################
# Set Path
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
rm(list=ls())

# Options and Seed
options(stringsAsFactors = F)
set.seed(0213)

# Custom functions
# ggplot rescale x axis....
scale_x_reordered <- function(..., sep = "___") {
  reg <- paste0(sep, ".+$")
  ggplot2::scale_x_discrete(labels = function(x) gsub(reg, "", x), ...)
}
# ggplot order over facets...
reorder_within <- function(x, by, within, fun = mean, sep = "___", ...) {
  new_x <- paste(x, within, sep = sep)
  stats::reorder(new_x, by, FUN = fun)
}

# Source Theme for Figures
source('ggplot_theme_ddl.R', encoding = "UTF-8")

# Load Data
df <- readRDS("../data/smd_ner_2015_2019_combined.RDS")
candidates_list_15 <- read.csv('../support/candidates-2015/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))
candidates_list_19 <- read.csv('../support/candidates-2019/00-Named_Entity_List_withID.csv', stringsAsFactors = F) %>% 
  as_tibble %>% mutate(id=as.character(id))

candidates_list_19 <- candidates_list_19 %>% mutate(candidacy = as.character(gsub("\\s", " ", council))) %>% 
  mutate(council = case_when(candidacy %in% c("SR", "Former Staenderat", "Former Staenderat") ~ "sr",
                             candidacy %in% c("NR", "Former Nationalrat", "Former Nationalrat") ~  "nr",
                             candidacy %in% c("SR und NR", "NR und SR") ~ "sr & nr")) %>% 
  dplyr::select(-c(candidacy))

df$year <- as.character(df$year)
df$date <- format(as.Date(df$date, "%m-%d"), format = "%m-%d")
df$fullname <- ifelse(df$fullname == "Adèle Goumaz", "Adèle Thorens Goumaz", df$fullname)
df$fullname <- ifelse(df$fullname == "Niklaus-Samuel Gugger", "Nik Gugger", df$fullname)
df$incumbent <- ifelse(df$fullname == "Philipp Müller", 1, df$incumbent)

#-----------------------------------------------------------------------------#
# Configurations
#-----------------------------------------------------------------------------#
# Remove Federal Councilors
council <- TRUE
# Remove Party Presidents
president <- TRUE

# Unwanted Topics
unwanted_topics <- c('PoliticalSystem', 'Other_unclassified_Political_Texts', 
                     'NotPolitical', 'Not Classified', 'Other_Problems')

# Council members 2015
council_15 <- c("Ueli Maurer", "Alain Berset", "Didier Burkhalter", 
                "Simonetta Sommaruga", "Eveline Widmer Schlumpf", 
                "Johann Schneider-Ammann", "Doris Leuthard")

# Council members 2019
council_19 <- c("Ueli Maurer", "Alain Berset", "Ignazio Cassis", 
                "Simonetta Sommaruga", "Guy Parmelin",
                "Karin Keller-Sutter", "Viola Amherd")

# Party Presidents 2015
presi_15 <- c("Toni Brunner", "Christian Levrat", "Philipp Müller", 
              "Christophe Darbellay", "Regula Rytz", "Martin Bäumle", "Martin Landolt")

# Party Presidents 2019
presi_19 <- c("Albert Rösti", "Christian Levrat", "Petra Gössi", 
              "Gerhard Pfister", "Regula Rytz", "Jürg Grossen", "Martin Landolt")

# Remove Council Members:
if(council == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% council_15) |
                               (year == "2019" & !fullname %in% council_19))
}


# Remove Party Presidents:
if(president == T){
  df <- df %>% dplyr::filter((year == "2015" & !fullname %in% presi_15) |
                               (year == "2019" & !fullname %in% presi_19))
}

# Values for colors
values_gender_1 <- c("f" = "#DD2461", "m" = "#7D7D7C")
shapes_gender_1 <- c("f" = 1, "m" = 5)
.fill2 <- unlist(colourList[['colour']][['parties']])
names(.fill2) <- toupper(names(.fill2))
.fill2[22] <- "grey"
names(.fill2) <- c(names(.fill2)[1:21],"Rest")
values_year_2 <- c("2015" = "#DD2461", "2019" = "#7D7D7C")
values_gender_2 <- c("Women" = "#DD2461", "Men" = "#7D7D7C")

# Remove small newspapers that are not of regional importance 
sort(unique(df$so_txt))

regional_national_newspapers <- c(
                                  "20 minuten", #National Newspaper (DE) 
                                  "20 minutes", #National Newspaper (FR)
                                  "20 minuti", #National Newspaper (IT)
                                  "24 heures", #National Newspaper (FR)
                                  "srf.ch", #National Newspaper (DE)
                                  "rts.ch", #National Newspaper (FR)
                                  "Aargauer Zeitung / MLZ", #Big Regional Newspaper (DE)
                                  "Arcinfo", #Medium Regional Newspaper (FR)
                                  "Basler Zeitung", #Big Regional Newspaper (DE)
                                  "Berner Zeitung", #Big Regional Newspaper (DE)
                                  "Blick", #National Newspaper (DE)
                                  "Bote der Urschweiz", #Medium Regional Newspaper (DE)
                                  "Bündner Tagblatt", #Medium Regional Newspaper (DE)
                                  "Der Bund", #National Newspaper (DE)
                                  "Der Landbote", #Medium Regional Newspaper (DE)
                                  "Die Weltwoche", #National Newspaper (DE)
                                  "Die Wochenzeitung", #National Newspaper (DE)
                                  "Finanz und Wirtschaft", #National Newspaper (DE)
                                  "La Liberté", #Medium Regional Newspaper (FR)
                                  "Le Matin", #National Newspaper (FR)
                                  "Le Matin Dimanche", #National Newspaper (FR)
                                  "Le Temps", #National Newspaper (FR)
                                  "Luzerner Zeitung", #Big Regional Newspaper (DE)
                                  "Neue Luzerner Zeitung", #Big Regional Newspaper (DE)
                                  "Neue Zürcher Zeitung", #National Newspaper (DE)
                                  "NZZ am Sonntag", #National Newspaper (DE)
                                  "Ostschweiz am Sonntag", #National Newspaper (DE)
                                  "Schweiz am Sonntag / MLZ", #National Newspaper (DE)
                                  "Sonntagsblick", #National Newspaper (DE)
                                  "SonntagsZeitung", #National Newspaper (DE)
                                  "St. Galler Tagblatt", #Big Regional Newspaper (DE)
                                  "Südostschweiz", #Medium Regional Newspaper (DE)
                                  "swissinfo.ch", #National Newspaper (DE)
                                  "Tages-Anzeiger", #National Newspaper (DE)
                                  "Tribune de Genève", #Big Regional Newspaper (FR)
                                  "watson.ch", #National Newspaper (DE)
                                  "www.20minutes.ch", #National Newspaper (DE)
                                  "www.lematin.ch" #National Newspaper (FR)
                                  )

df <- df %>% filter(so_txt %in% regional_national_newspapers)
unique(df$so_txt)
###############################################################################
# 3) Generate aggregated hits and documents per candidates for 2015 vs 2019
###############################################################################
df_hits_f <- df %>% group_by(year, fullname, person.id, gender, party, canton, list_place_1, age, incumbent, selectsclass, council) %>% dplyr::summarise(n.hits = n())
unique(df_hits_f$council)

# Check if council NA are just the articles with no mention at all:
help_council_na <- df_hits_f %>% filter(is.na(council) == T)
unique(help_council_na$fullname)

# Add Candidates with zero mentions via the lists of candidates:
names(df_hits_f)
sapply(df_hits_f, mode)

helper <- filter(df_hits_f, year == "2015")
candidates_list_15 <- candidates_list_15 %>% filter(!id %in% helper$person.id)

helper <- filter(df_hits_f, year == "2019")
candidates_list_19 <- candidates_list_19 %>% filter(!id %in% helper$person.id)


candidates_list_15 <- candidates_list_15 %>% dplyr::select(c(age,district,fullname,gender,incumbent,list_place_1,party,id,council)) %>% 
  dplyr::mutate(person.id = id,
                canton = district,
                n.hits = 0,
                selectsclass = NA,
                year = "2015") %>% 
  dplyr::select(-c("district","id"))



candidates_list_19 <- candidates_list_19 %>% dplyr::select(c(age,district,fullname,gender,incumbent,list_place_1,party,id,council)) %>% 
  dplyr::mutate(person.id = id,
                canton = district,
                n.hits = 0,
                selectsclass = NA,
                year = "2019") %>% 
  dplyr::select(-c("district","id"))

#Check if we have candidates where we don't know the chamber:
unique(candidates_list_15$council)
unique(candidates_list_19$council)

df_hits_f <- dplyr::bind_rows(df_hits_f,candidates_list_15,candidates_list_19)

df_hits_15 <- df_hits_f %>% filter(year == "2015") %>% group_by(fullname, person.id, gender, party, canton, list_place_1, age, incumbent, council) %>%  
  tidyr::complete(selectsclass = unique(df_hits_f$selectsclass), fill = list(n.hits = 0,
                                                                             year = "2015"))
# Complete Selectsclass for each Person in each Year
df_hits_19 <- df_hits_f %>% filter(year == "2019") %>% group_by(fullname, person.id, gender, party, canton, list_place_1, age, incumbent, council) %>%  
  tidyr::complete(selectsclass = unique(df_hits_f$selectsclass), fill = list(n.hits = 0,
                                                                             year = "2019"))
# Sanity Check
length(unique(df_hits_19$person.id))
length(unique(df_hits_15$person.id))

# Combine the completed dfs
df_hits_f <- dplyr::bind_rows(df_hits_15,df_hits_19)

# Remove the Class NA
df_hits_f <- df_hits_f %>% filter(is.na(selectsclass) == F) %>% filter(is.na(person.id) == F)

# Fix the Canton with a Typo!
df_hits_f <- df_hits_f %>% mutate(canton = ifelse(canton == "St.Gallen", "St. Gallen", canton))
sort(unique(df_hits_f$canton))

# Sanity Check 
sanity <- df_hits_f %>% group_by(year, selectsclass) %>% dplyr::summarise(n = n())
sanity
###############################################################################
# 4) Models
###############################################################################
# Factorize some variables
df_hits_f$party <- as.factor(df_hits_f$party)
df_hits_f$canton <- as.factor(df_hits_f$canton)
df_hits_f$list_place <- 0
df_hits_f$list_place[df_hits_f$list_place_1 %in% c(1,2,3)] <- 1
df_hits_f$age_sq <- df_hits_f$age * df_hits_f$age
df_hits_f$incumbent <- as.factor(df_hits_f$incumbent)
df_hits_f$gender <- as.factor(df_hits_f$gender)

df_hits_agg <- df_hits_f %>% ungroup() %>% dplyr::select(-c(`person.id`)) %>% 
  group_by(year, fullname, gender, party, canton, list_place_1, list_place, age, age_sq, incumbent, council) %>% 
  dplyr::select(-selectsclass) %>%
  summarise_all(sum) %>%
  mutate(standerat=as.factor(ifelse(council == "nr","Nationalrat","Ständerat"))) %>%
  mutate(list_place_1=ifelse(is.na(list_place_1),1,list_place_1)) %>%
  filter(is.na(fullname)==F)

df_hits_2015 <- df_hits_agg %>% filter(year == "2015")
df_hits_2019 <- df_hits_agg %>% filter(year == "2019")

pred_df <- expand.grid(gender=unique(df_hits_agg$gender),
                       year=unique(df_hits_agg$year),
                       party="SP",
                       list_place_1=mean(df_hits_agg$list_place_1,),
                       canton="Zürich",
                       standerat=unique(df_hits_agg$standerat),
                       incumbent=unique(df_hits_agg$incumbent))


# ----------------------------------------------------------------------------#
# Negative Binomial Regression Models
# ----------------------------------------------------------------------------#
hits.base <- glm.nb(n.hits~gender*year+party,data=df_hits_agg)
summary(hits.base)

hits.chamber <- glm.nb(n.hits~gender*year+party+standerat,data=df_hits_agg)
summary(hits.chamber)

hits.controls <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton+incumbent,data=df_hits_agg)
summary(hits.controls)


hits.controls_inc <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton,data=filter(df_hits_agg, incumbent == 1))
summary(hits.controls_inc)

hits.controls_noinc <- glm.nb(n.hits~gender*year+party+standerat+list_place_1+canton,data=filter(df_hits_agg, incumbent == 0))
summary(hits.controls_noinc)

# ----------------------------------------------------------------------------#
# Negative Binomial Multilevel Regression Models (Canton Only)
# ----------------------------------------------------------------------------#
set.seed(0213)
control = glmerControl(optimizer = "nlminbwrap", optCtrl = list(maxfun = 2e5))
# Scale some Variables for better convergence...
df_hits_agg_scaled <- df_hits_agg

# No scaling, since all variables are not very scaleable!

# Base Model
hits.base.ml <- glmer.nb(n.hits ~ gender * year + party + (1|canton), data=df_hits_agg_scaled)
summary(hits.base.ml)

# Chamber Model
hits.chamber.ml <- glmer.nb(n.hits ~ gender * year + party + standerat + (1|canton), data=df_hits_agg_scaled)
summary(hits.chamber.ml)

# Controls Model
hits.controls.ml <- glmer.nb(n.hits ~ gender * year + party + standerat + list_place_1 + incumbent + (1|canton), data=df_hits_agg_scaled)
summary(hits.controls.ml)

###############################################################################
# 5) Save Model Output in Tables
###############################################################################
texreg::texreg(list(hits.base,hits.base.ml,hits.controls,hits.controls.ml,hits.controls_inc,hits.controls_noinc),omit.coef="canton|party",#reorder.coef=c(2,3,4,5,6,7,1),
               custom.gof.rows = list("Multilevel" = c("", "\\checkmark", "", "\\checkmark", "", ""), 
                                      "Party FEs" = c("\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark", "\\checkmark"),
                                      "Cantonal FEs"=c("", "",  "\\checkmark", "", "\\checkmark", "\\checkmark")),
               file="../tables_appendix/baseline_new_big_newspapers_only.tex",
               custom.coef.map = list("genderm"="Male",
                                      "year2019"="2019",
                                      "genderm:year2019"="Male*2019",
                                      "standeratStänderat"="Council of States",
                                      "list_place_1"="Top list",
                                      "incumbent1"="incumbent",
                                      "(Intercept)"="(Intercept)"),
               label="table:results_mentions",
               float.pos="h")



